
# Plausibility check of age and residency
table(df$age - df$yearsIn, useNA = "ifany")

# There is one value of -70 and 8 NAs, these observations will be dropped
df <- subset(df, age-yearsIn >= 0)

# Create Quiz
df$AdDTM <- df$address + df$dui + df$taxes + df$medHelp + 1

# Create IPL-24 -----------------------------------------------------------

df$pol24 <- df$polUnderstanding + df$polDiscuss + df$polQuiz + df$polAction
df$econ24 <- df$income + df$employment3cat + df$expenses + df$emplSatisfied
df$soc24 <- df$maxOrg + df$favors + df$dinner + df$contactsInUsCat2
df$psy24 <- df$connectionHc + df$futureHc + df$isolation + df$outsider
df$ling24 <- df$read + df$speak + df$write + df$listen
df$nav24 <- df$seeDoctor + df$findJobs + df$legalProblems + df$AdDTM

df$ipl24 <- df$pol24 + df$econ24 + df$soc24 + df$psy24 + df$ling24 + df$nav24
df$ipl20 <- df$pol24 + df$soc24 + df$psy24 + df$ling24 + df$nav24
df$ipl16 <- df$pol24 + df$soc24 + df$psy24 + df$nav24


# Rescale IPL-24 ----------------------------------------------------------

df$ipl24s <- rescale(df$ipl24, to = c(0, 1), from = c(24, 120))
df$ipl20s <- rescale(df$ipl20, to = c(0, 1), from = c(20, 100))
df$ipl16s <- rescale(df$ipl16, to = c(0, 1), from = c(16, 80))

df$pol24s <- rescale(df$pol24, to = c(0, 1), from = c(4, 20))
df$econ24s <- rescale(df$econ24, to = c(0, 1), from = c(4, 20))
df$soc24s <- rescale(df$soc24, to = c(0, 1), from = c(4, 20))
df$psy24s <- rescale(df$psy24, to = c(0, 1), from = c(4, 20))
df$ling24s <- rescale(df$ling24, to = c(0, 1), from = c(4, 20))
df$nav24s <- rescale(df$nav24, to = c(0, 1), from = c(4, 20))

# IPL-12 ------------------------------------------------------------------

df$pol12 <- df$polUnderstanding + df$polDiscuss 
df$econ12 <- df$income + df$employment3cat 
df$soc12 <- df$dinner + df$contactsInUsCat2
df$psy12 <- df$connectionHc + df$outsider 
df$ling12 <- df$read + df$speak 
df$nav12 <- df$seeDoctor + df$findJobs 

df$ipl12 <- df$pol12 + df$econ12 + df$soc12 + df$psy12 + df$ling12 + df$nav12
df$ipl10 <- df$pol12 +  df$soc12 + df$psy12 + df$ling12 + df$nav12
df$ipl8 <- df$pol12 + df$soc12 + df$psy12 + df$nav12


df$ipl12s <- rescale(df$ipl12, to = c(0, 1), from = c(12, 60))
df$ipl10s <- rescale(df$ipl10, to = c(0, 1), from = (c(10, 50)))
df$ipl8s <- rescale(df$ipl8, to = c(0, 1), from = c(8, 40))

df$pol12s <- rescale(df$pol12, to = c(0, 1), from = c(2, 10))
df$econ12s <- rescale(df$econ12, to = c(0, 1), from = c(2, 10))
df$soc12s <- rescale(df$soc12, to = c(0, 1), from = c(2, 10))
df$psy12s <- rescale(df$psy12, to = c(0, 1), from = c(2, 10))
df$ling12s <- rescale(df$ling12, to = c(0, 1), from = c(2, 10))
df$nav12s <- rescale(df$nav12, to = c(0, 1), from = c(2, 10))


# Immigration status ------------------------------------------------------

df$immiFactor <- NA
df$immiFactor <- ifelse(df$immigrationStatus <= 2, "citizenNat", 
                        ifelse(df$immigrationStatus == 3, "GreenCard_EU",
                               ifelse(df$immigrationStatus > 3, "Other temporary status",
                                      NA))) 

df$immiFactor <- factor(df$immiFactor, levels = c("Other temporary status", "GreenCard_EU", "citizenNat"))

# Subset data -------------------------------------------------------------

all <- df
df <- subset(df, !is.na(ipl24))
df <- subset(df, !is.na(immiFactor))

# Factor levels -----------------------------------------------------------

df$survey <- factor(df$survey,levels = c("YouGov", "Germany", "NewYork", "Allies"))


# Scale Schooling and Residency -------------------------------------------

df$age10 <- df$age / 10
df$yearsIn10 <- df$yearsIn / 10
df$yschooling10 <- df$yschooling / 10

df$ageStd <- df$age / sd(df$age)
df$yearsInStd <- df$yearsIn / sd(df$yearsIn)
df$yschoolingStd <- df$yschooling / sd(df$yschooling)

# Shared Language ---------------------------------------------------------

df$sharedL <- NA
df$sharedL[df$survey == "Germany" & (df$bcountry == "Austria" |
                                      df$bcountry == "Switzerland"|
                                      df$bcountry == "Luxembourg"
                                     )] <- 1
df$sharedL[df$survey != "Germany" & (df$bcountry == "Australia" |
                                       df$bcountry == "Anguilla" |
                                       df$bcountry == "Antigua and Barbuda" |
                                       df$bcountry == "Canada" |
                                       df$bcountry == "Cook Islands" |
                                       df$bcountry == "Ireland" |
                                       df$bcountry == "Jamaica" |
                                       df$bcountry == "New Zealand" |
                                       df$bcountry == "United Kingdom")] <- 1
df$sharedL[is.na(df$sharedL)] <- 0

# Female dummy ------------------------------------------------------------

df$gender <- ifelse(df$gender == 1, 0, 1)


